<?php

$sText = file_get_contents('c:/text.txt');

$oDom = new DOMDocument('1.0', 'UTF-8');
$oDom->encoding = 'UTF-8';
@$oDom->loadHTML($sText);

$aLinks = $oDom->getElementsByTagName('a');
foreach($aLinks as /* @var $oElement DOMElement */ $oElement)
{
	$oElement->setAttribute('target', '_blank');
}

//$aLinks = $oDom->getElementsByTagName('img');
//foreach($aLinks as /* @var $oElement DOMElement */ $oElement)
//{
//	$oElement->setAttribute('src', '');
//}

$aDivs = $oDom->getElementsByTagName('*');
foreach($aDivs as /* @var $oElement DOMElement */ $oElement)
{
	if ($oElement->hasAttribute('style'))
	{
		$aOutStyles = array();
		$sStyle = trim($oElement->getAttribute('style'));
		$aStyles = explode(';', $sStyle);

		foreach ($aStyles as $iKey => $sStyleItem)
		{
			$aStyleValue = explode(':', $sStyleItem, 2);
			$sName = trim(strtolower($aStyleValue[0]));
			if (in_array($sName, array('position', 'left', 'right', 'top', 'bottom')))
			{
				continue;
			}
			else if ('height' === $sName)
			{
				$aOutStyles[] = 'min-'.$sStyleItem;
			}
			else
			{
				$aOutStyles[] = $sStyleItem;
			}
		}

		$oElement->setAttribute('style', implode(';', $aOutStyles));
	}
}

$sText = $oDom->saveHTML();

$aToRemove = array(
	"'<!doctype[^>]*>'si",
	"'<html[^>]*>'si",
	"'</html>'si",
	"'<head[^>]*>.*?</head>'si",
	"'<link[^>]*>'si",
	"'<base[^>]*>'si",
	"'<title[^>]*>.*?</title>'si",
	"'<style[^>]*>.*?</style>'si",
	"'<script[^>]*>.*?</script>'si",
	"'</script>'si",
	"'<object[^>]*>.*?</object>'si",
	"'<embed[^>]*>.*?</embed>'si",
	"'<applet[^>]*>.*?</applet>'si",
	"'<mocha[^>]*>.*?</mocha>'si",
	"'<meta[^>]*>'si");

$sText = preg_replace($aToRemove, '', $sText);
$sText = preg_replace('/<([\/]{0,1})body([^>]*?)>/im', '<\\1div\\2>', $sText);
$sText = preg_replace('/[\r\n\t]/m', ' ', $sText);
$sText = preg_replace('/[ ]+/', ' ', $sText);
echo trim($sText);